Code
library(gridExtra)
library(corrplot)
library(tidyverse)
library(estimatr)
library(skimr)
library(tidyr)
rm(list=ls())
b = read_csv("Teams.csv", show_col_types = FALSE)
#Add win percentage
b = b %>%
mutate(winpct = W/G)
#removing IPouts
b = b %>%
select(-IPouts)
# normalizing variables to per game and subseting from 1980 on
b <- b %>%
mutate_at(vars(15:28,30:37), ~ round(. / b$G, 2))
b = b[b$yearID >= 1980,]
b = b %>%
select(yearID, name, franchID, H, HR, SO, R, winpct)
b %>%
skim() %>%
yank('numeric')Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| yearID | 0 | 1 | 2001.68 | 12.29 | 1980.00 | 1991.00 | 2002.00 | 2012.00 | 2022.00 | ▇▆▇▇▇ |
| H | 0 | 1 | 8.85 | 0.55 | 6.50 | 8.48 | 8.85 | 9.22 | 10.40 | ▁▁▇▇▂ |
| HR | 0 | 1 | 1.00 | 0.26 | 0.29 | 0.81 | 0.99 | 1.17 | 1.97 | ▁▇▇▃▁ |
| SO | 0 | 1 | 6.67 | 1.24 | 3.61 | 5.74 | 6.52 | 7.50 | 10.13 | ▁▇▇▅▁ |
| R | 0 | 1 | 4.53 | 0.53 | 3.10 | 4.16 | 4.50 | 4.87 | 6.23 | ▁▆▇▃▁ |
| winpct | 0 | 1 | 0.50 | 0.07 | 0.27 | 0.45 | 0.50 | 0.56 | 0.72 | ▁▅▇▆▁ |



